*****************************************************************************
**                      Replication do-file for article                    **
** Continuity Trumps? The impact of interviewer change on item nonresponse **
**                  authors: Kristin Hajek & Nina Schumann                 ** 
*****************************************************************************

*********************
** 6) Descriptives **
*********************

** Table 1 :

use "$Intwechsel\4_sample.dta", clear

* total number of respondents
tab wave	// W1: 6,560  W2: 7,302  W3: 6,526  W4: 6,028  W5: 5,471  W6: 5,010  W7: 4,270

* total number of interviewers:
egen uniqueint_w1=nvals(intid) if wave==1	// 417
egen uniqueint_w2=nvals(intid) if wave==2	// 335
egen uniqueint_w3=nvals(intid) if wave==3	// 315
egen uniqueint_w4=nvals(intid) if wave==4	// 298
egen uniqueint_w5=nvals(intid) if wave==5	// 286
egen uniqueint_w6=nvals(intid) if wave==6	// 266
egen uniqueint_w7=nvals(intid) if wave==7	// 247
tab1 uni*

* number of new interviewers
sort intid wave
count if wave==2 & intid!=intid[_n-1]	// 35
count if wave==3 & intid!=intid[_n-1]	// 19
count if wave==4 & intid!=intid[_n-1]	// 21
count if wave==5 & intid!=intid[_n-1]	// 19
count if wave==6 & intid!=intid[_n-1]	// 6
count if wave==7 & intid!=intid[_n-1]	// 11

* number of previous wave non-respondents
tab wave last_nonres	// 0, 0, 449, 280, 233, 190, 141

* number of respndent's short and long-distance moves
tab wave move_close	// 0, 725, 831, 784, 740, 726, 556
tab wave move_dist	// 0, 58, 114, 106, 107, 100, 67

* number of % respondents who experience an interviewer change
tab intchange wave, col	// 15.7 , 10.7 , 7.1 , 8.0 , 6.0 , 5.7

*****************************************************************************

** 6.1 Additional information:

* How many respondents experience interviewer change over the waves:
keep id wave intchange cohort
reshape wide intchange, i(id) j(wave)
*browse
egen number=anycount(intchange*), values (1)
tab number, m			//  36 % ever experience interviewer change

* probability of interviewer change x previous wave non-response
use "$Intwechsel\4_sample.dta", clear
tab intchange last_nonres, col	// 29.9% vs. 7.2

* interviewer change because of interviewer dropout?
use "$Intwechsel\4_sample.dta", clear
sort id wave 
tab exityear, m 
gen helpichange=0 if intchange==1
replace helpichange=1 if intchange==1 & exityear[_n-1]==wave & id==id[_n-1]
tab helpichange	// 58% of interviewer change goes hand in hand with interviewer dropout

* respondent / interviewer gender and age
use "$Intwechsel\4_sample.dta", clear
tab sex_gen	wave, col	// ~53% female respondents
tab intage wave, m	// minimum interviewer age wave 1: 22  max: 82
sort intid wave
drop if intid==intid[_n+1] & wave==wave[_n+1]
tab intsex wave, col		// interviewer gender per wave: 43%, 44%, 44%, 42%, 40%, 41%, 42%
bysort wave: sum intage		// interviewer age per wave: 56, 59, 60, 62, 62, 63, 64 

*****************************************************************************

** Figure A.1:

* age difference respondents-interviewers 
use "$Intwechsel\4_sample.dta", clear
tab age, m
hist age, frequency  color(gs7) addplot(hist intage, frequency color(gs12)) scheme(s1mono) xtitle("age") ytitle("Number of Interviews") ///
		legend(label(1 "Age structure respondents") label(2 "Age structure interviewers") position(10) ring(0)) 
	graph save 	"agestructure.gph", replace   
	graph export "agestructure.wmf", replace

*****************************************************************************
	
** Table 2 plus additional information:
	
* nonresponse wave 1
use "$Intwechsel\4_sample.dta", clear
tab wave
keep if wave==1
* all CAPI questions
sum varzahl_CAPI	//  min: 125 - max: 432 - mean: 248
sum missings_CAPI	// min: 0 - max: 50 - mean: 5.18
tab missingsPC_CAPI, m	// no missings by 11.5% of respondents
sum missingsPC_CAPI	// min: 0% - max: 17.42%  - mean: 2.23%
* -1: dont know
sum missings_CAPI_1	// min: 0 - max: 32 - mean: 3.61
tab missingsPC_CAPI_1, m	// no missings by 18.22% of respondents
sum missingsPC_CAPI_1	// min: 0% - max: 10.00%  - mean: 1.62% - std.dev.: 1.70
tab missings_Eink_1	//  missings: 2391 - 36.45%
* -2: refusal
sum missings_CAPI_2	// min: 0 - max: 28 - mean: 1.57
tab missingsPC_CAPI_2, m	// no missings by 59.60% of respondents
sum missingsPC_CAPI_2	// min: 0% - max: 9.84%  - mean: 0.61% - std.dev.: 1.25
tab missings_Eink_2	// missings: 693 - 10.56%
* --------------------------------------------------------------------
* nonresponse wave 2
use "$Intwechsel\4_sample.dta", clear
keep if wave==2
*
sum varzahl_CAPI	// min: 116 - max: 542 - mean: 310
sum missings_CAPI	// min: 0 - max: 42 - mean: 4.20
tab missingsPC_CAPI, m	// no missings by 18.79% of respondents
sum missingsPC_CAPI	// min: 0% - max: 18.26%  - mean: 1.43%
*
sum missings_CAPI_1	// min: 0 - max: 29 - mean: 2.87
tab missingsPC_CAPI_1, m	// no missings by 26.02% of respondents
sum missingsPC_CAPI_1	// min: 0% - max: 10%  - mean: 0.98% - std.dev.: 1.20
tab missings_Eink_1	//   missings: 2443 - 33.46%
*
sum missings_CAPI_2	// min: 0 - max: 33 - mean: 1.33
tab missingsPC_CAPI_2, m	// no missings by 64.37% of respondents
sum missingsPC_CAPI_2	// min: 0% - max: 9.85%  - mean: 0.45% - std.dev.: 1.08
tab missings_Eink_2	//   missings: 747 - 10.23%
* --------------------------------------------------------------------
* nonresponse wave 3
use "$Intwechsel\4_sample.dta", clear
keep if wave==3
*
sum varzahl_CAPI	// min: 101 - max: 425 - mean: 240
sum missings_CAPI	// min: 0 - max: 48 - mean: 4.48
tab missingsPC_CAPI, m	// no missings by 14.62% of respondents
sum missingsPC_CAPI	// min: 0% - max: 17.49%  - mean: 2.00%
*
sum missings_CAPI_1	// min: 0 - max: 31 - mean: 3.14
tab missingsPC_CAPI_1, m	// no missings by 20.49% of respondents
sum missingsPC_CAPI_1	// min: 0% - max: 10%  - mean: 1.43% - std.dev.: 1.53
tab missings_Eink_1	//   missings: 2083 - 31.92%
*
sum missings_CAPI_2	// min: 0 - max: 28 - mean: 1.35
tab missingsPC_CAPI_2, m	// no missings by 63.25% of respondents
sum missingsPC_CAPI_2	// min: 0% - max: 9.54%  - mean: 0.58% - std.dev.: 1.19
tab missings_Eink_2	//   missings: 531 - 8.14%
* --------------------------------------------------------------------
* nonresponse wave 4
use "$Intwechsel\4_sample.dta", clear
keep if wave==4
*
sum varzahl_CAPI	// min: 97 - max: 506 - mean: 264
sum missings_CAPI	// min: 0 - max: 36 - mean: 3.03
tab missingsPC_CAPI, m	// no missings by 27.26% of respondents
sum missingsPC_CAPI	// min: 0% - max: 15.46%  - mean: 1.22%
*
sum missings_CAPI_1	// min: 0 - max: 22 - mean: 1.81
tab missingsPC_CAPI_1, m	// no missings by 40.39% of respondents
sum missingsPC_CAPI_1	// min: 0% - max: 9.80%  - mean: 0.73% - std.dev.: 1.07
tab missings_Eink_1	//   missings: 1563 - 25.93%
*
sum missings_CAPI_2	// min: 0 - max: 25 - mean: 1.22
tab missingsPC_CAPI_2, m	// no missings by 62.31% of respondents
sum missingsPC_CAPI_2	// min: 0% - max: 9.85%  - mean: 0.50% - std.dev.: 1.04
tab missings_Eink_2	//   missings: 451 - 7.48%
* --------------------------------------------------------------------
* nonresponse wave 5
use "$Intwechsel\4_sample.dta", clear
keep if wave==5
*
sum varzahl_CAPI	// min: 96 - max: 567 - mean: 263
sum missings_CAPI	// min: 0 - max: 43 - mean: 3.70
tab missingsPC_CAPI, m	// no missings by 22.30% of respondents
sum missingsPC_CAPI	// min: 0% - max: 16.31%  - mean: 1.45%
*
sum missings_CAPI_1	// min: 0 - max: 37 - mean: 2.74
tab missingsPC_CAPI_1, m	// no missings by 29.43% of respondents
sum missingsPC_CAPI_1	// min: 0% - max: 9.80%  - mean: 1.07% - std.dev.: 1.38
tab missings_Eink_1	//   missings: 1334 - 24.38%
*
sum missings_CAPI_2	// min: 0 - max: 28 - mean: 0.96
tab missingsPC_CAPI_2, m	// no missings by 70.26% of respondents
sum missingsPC_CAPI_2	// min: 0% - max: 9.03%  - mean: 0.38% - std.dev.: 0.91
tab missings_Eink_2	//   missings: 368 - 6.73%
* --------------------------------------------------------------------
* nonresponse wave 6
use "$Intwechsel\4_sample.dta", clear
keep if wave==6
*
sum varzahl_CAPI	//  min: 84 - max: 416 - mean: 256
sum missings_CAPI	// min: 0 - max: 51 - mean: 2.95
tab missingsPC_CAPI, m	// no missings by 28.7% of respondents
sum missingsPC_CAPI	// min: 0% - max: 13.14%  - mean: 1.18%
*
sum missings_CAPI_1	// min: 0 - max: 26 - mean: 2.02
tab missingsPC_CAPI_1, m	// no missings by 35.97% of respondents
sum missingsPC_CAPI_1	// min: 0% - max: 8.94%  - mean: 0.81% - std.dev.: 1.12
tab missings_Eink_1	//   missings: 982 - 19.60%
*
sum missings_CAPI_2	// min: 0 - max: 38 - mean: 0.92
tab missingsPC_CAPI_2, m	// no missings by 72.32% of respondents
sum missingsPC_CAPI_2	// min: 0% - max: 9.79%  - mean: 0.37% - std.dev.: 0.94
tab missings_Eink_2	//   missings: 383 - 7.64%
* --------------------------------------------------------------------
* nonresponse wave 7
use "$Intwechsel\4_sample.dta", clear
keep if wave==7
*
sum varzahl_CAPI	// min: 118 - max: 501 - mean: 287
sum missings_CAPI	// min: 0 - max: 38 - mean: 3.19
tab missingsPC_CAPI, m	// no missings by 23.26% of respondents
sum missingsPC_CAPI	// min: 0% - max: 14.94%  - mean: 1.17%
*
sum missings_CAPI_1	// min: 0 - max: 29 - mean: 2.16
tab missingsPC_CAPI_1, m	// no missings by 31.33% of respondents
sum missingsPC_CAPI_1	// min: 0% - max: 9.47%  - mean: 0.80% - std.dev.: 1.06
tab missings_Eink_1	//   missings: 664 - 15.55%
*
sum missings_CAPI_2	// min: 0 - max: 25 - mean: 1.03
tab missingsPC_CAPI_2, m	// no missings by 68.52% of respondents
sum missingsPC_CAPI_2	// min: 0% - max: 9.20%  - mean: 0.37% - std.dev.: 0.88
tab missings_Eink_2	//   missings: 355 - 8.31%
* --------------------------------------------------------------------
* all waves
use "$Intwechsel\4_sample.dta", clear
*
sum varzahl_CAPI	// min: 84 - max: 567 - mean: 267
sum missings_CAPI	// min: 0 - max: 51 - mean: 3.90
tab missingsPC_CAPI, m	// no missings by 20.35% of respondents
sum missingsPC_CAPI	// min: 0% - max: 18.26%  - mean: 1.56%
*
sum missings_CAPI_1	// min: 0 - max: 37 - mean: 2.68
tab missingsPC_CAPI_1, m	// no missings by 28.22% of respondents; 
sum missingsPC_CAPI_1	// min: 0% - max: 10.00%  - mean: 1.09%
*
sum missings_CAPI_2	// min: 0 - max: 38 - mean: 1.23
tab missingsPC_CAPI_2, m	// no missings by 65.31% of respondents; 
sum missingsPC_CAPI_2	// min: 0% - max: 9.85%  - mean: 0.47%

*****************************************************************************

** Table A1:

* nonresponse per cohort
use "$Intwechsel\4_sample.dta", clear
sort cohort
by cohort: sum missingsPC_CAPI_1	// C1 mean: 1.5 - C1 std.dev.: 1.5
									// C2 mean: 0.9 - C2 std.dev.: 1.3
									// C3 mean: 0.8 - C3 std.dev.: 1.2
by cohort: sum missingsPC_CAPI_2	// C1 mean: 0.5 - C1 std.dev.: 1.1
									// C2 mean: 0.4 - C2 std.dev.: 1.0
									// C3 mean: 0.5 - C3 std.dev.: 1.1
by cohort: tab missings_Eink_1		// C1 N: 9,024 - C1 percent: 57.1
									// C2 N: 1,447 - C2 percent: 12.3
									// C3 N: 989 - C3 percent: 7.3%
by cohort: tab missings_Eink_2		// C1 N: 862 - C1 percent: 5.5%
									// C2 N: 1,081 - C2 percent: 9.2%
									// C3 N: 1,585 - C3 percent: 11.7%

*****************************************************************************

** Number of refusals per respondent over the waves:

use "$Intwechsel\4_sample.dta", clear
keep id wave missings_Eink_1 missings_Eink_2 
reshape wide missings_Eink_1 missings_Eink_2, i(id) j(wave)
egen number1=anycount(missings_Eink_11 missings_Eink_12 missings_Eink_13 missings_Eink_14 missings_Eink_15 missings_Eink_16 missings_Eink_17), values (1)
egen number2=anycount(missings_Eink_21 missings_Eink_22 missings_Eink_23 missings_Eink_24 missings_Eink_25 missings_Eink_26 missings_Eink_27), values (1)
tab number1, m			// 0:51.2%, 1:16.9%, 2:12.3%, 3:7.2%, 4:4.9%, 5:3.3%, 6:2.6%, 7:1.8%
tab number2, m			// 0:78.4%, 1:12.9%, 2:4.5%, 3:1.8%, 4:1.1%, 5:0.6%, 6:0.4%, 7:0.2%

use "$Intwechsel\4_sample.dta", clear
keep id wave missingsPC_CAPI_1 missingsPC_CAPI_2 missings_CAPI_1 missings_CAPI_2
reshape wide missingsPC_CAPI_1 missingsPC_CAPI_2 missings_CAPI_1 missings_CAPI_2, i(id) j(wave)
egen number1=anycount(missings_CAPI_11 missings_CAPI_12 missings_CAPI_13 missings_CAPI_14 missings_CAPI_15 missings_CAPI_16 missings_CAPI_17), values (0)
egen number2=anycount(missings_CAPI_21 missings_CAPI_22 missings_CAPI_23 missings_CAPI_24 missings_CAPI_25 missings_CAPI_26 missings_CAPI_27), values (0)
tab number1, m			// 0:42.8%, 1:23.5%, 2:14.7%, 3:8.5%, 4:5.1%, 5:3.3%, 6:1.5%, 7:0.6%
tab number2, m			// 0:11.1%, 1:16.7%, 2:21.7%, 3:13.4%, 4:10.9%, 5:10.3%, 6:9.6%, 7:6.3%

*****************************************************************************





